##### Step 1: load data and packages #####



library(tidyverse)
library(lubridate)
library(data.table)
library(haven)
library(fixest)
library(magrittr)
library(cobalt)        
library(xtable)

base_dir <- "..." # Manually change home folder 
data_dir <- paste0(base_dir, "data/")

## function to more easily specify controls in regression formulas
fixest_form <- function(
  y,
  x, 
  fe="0",
  x_re = FALSE,
  df = NULL
  ) {

  if (x_re & !is.null(df)) {
    x <- x %>% map(~ grep(., colnames(df), value=T)) %>% reduce(c)
  }
  
  xpart <- paste(x, collapse = " + ")
  fepart <- paste(fe, collapse = " + ")



  y %>% 
    paste(paste(xpart, fepart, sep="|"), sep=" ~ ") %>%
    as.formula

}

setFixest_etable(fitstat = ~ n + r2)

tablestyle  = style.tex(main="aer",
                 fixef.suffix = " FEs",
                 fixef.where ="var",
                 fixef.title = "",
                 stats.title = "\\midrule",
                 tablefoot=F,
                 yesNo="\\checkmark")

chpos <- readRDS(paste0(data_dir, "cable_zip.rds"))
load(paste0(data_dir, "zip_demographics_2010.RData"))

census2010 %>% setDT
chpos <- census2010[chpos, on = .(zipcode)]
chpos[,zip:=as.integer(zipcode)]

# read primary votes at zip level
primary <- readRDS(paste0(data_dir, "zip_vote.rds"))
primary[,zip:=as.integer(zip)]

chpos <- chpos[position_year==2009]
chpos_orig <- chpos
chpos <- chpos[primary, on = .(zip)]

# tea party candidates
tea_party_candidates <- read.csv(paste0(data_dir, "tea-party-candidates-with-time-varying-affiliation2.csv"))
tea_party_incumbent <- 
  tea_party_candidates %>% 
  dplyr::select(district, tea_party_incumbent = is_incumbent)
chpos %<>% left_join(tea_party_incumbent, by="district")





##### Step 2: compare demographic balance in zip codes w/ and w/o primary data #####



chpos_orig %<>% mutate(has.primary.data = zip %in% chpos$zip)
chpos_orig[, has.primary.data := as.factor(has.primary.data)]
demog_controls <- c(
  "pct_black", "pct_asian", "pct_other", "pct_hisp",
  "pct_male",
  "pct_age_10s", "pct_age_20s", "pct_age_30s", "pct_age_40s", "pct_age_50s", "pct_age_60s", "pct_age_70s", "pct_age_80s",
  "i(income_dec)",
  "pct_hsgrad", "pct_somecollege", "pct_bach", "pct_postgrad",
  "pct_suburban", "pct_urban",
  "totpop" 
)
covariates <- c(setdiff(demog_controls, "i(income_dec)"), "income_dec")
fml <- as.formula(paste("has.primary.data ~", paste(covariates, collapse = " + ")))
chpos_orig_clean <- chpos_orig[, .SD[complete.cases(.SD)], .SDcols = c("has.primary.data", covariates)]
fml <- as.formula(paste("has.primary.data ~", paste(covariates, collapse = " + ")))
bal <- bal.tab(fml, data = chpos_orig_clean, un = TRUE)
get_pvals <- function(data, treatment, vars) {
  data <- data.frame(data)
  data[[treatment]] <- as.factor(data[[treatment]])

  sapply(vars, function(v) {
    tryCatch({
      t.test(data[[v]] ~ data[[treatment]])$p.value
    }, error = function(e) NA)
  })
}
pvals <- get_pvals(chpos_orig_clean, "has.primary.data", covariates)
smds <- data.frame(
  variable = rownames(bal$Balance),
  smd = bal$Balance$Diff.Un
)
result <- smds %>%
  mutate(p_value = signif(pvals[variable], 3)) %>%
  arrange(desc(abs(smd)))
group_stats <- chpos_orig_clean %>%
  dplyr::select(has.primary.data, all_of(covariates)) %>%
  pivot_longer(cols = -has.primary.data, names_to = "variable", values_to = "value") %>%
  group_by(variable, has.primary.data) %>%
  summarise(
    mean = mean(value, na.rm = TRUE),
    sd = sd(value, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  pivot_wider(
    names_from = has.primary.data,
    values_from = c(mean, sd),
    names_glue = "{.value}_group_{has.primary.data}"
  )
options(scipen = 999)
group_stats %<>% mutate(across(2:(ncol(.)), function(x) {as.character(round(x, 3))}))
group_stats$mean_group_TRUE[group_stats$variable=="pct_hisp"] <- paste0(group_stats$mean_group_TRUE[group_stats$variable=="pct_hisp"], "0")
group_stats$sd_group_FALSE[group_stats$variable=="pct_hisp"] <- paste0(group_stats$sd_group_FALSE[group_stats$variable=="pct_hisp"], "0")
group_stats$sd_group_TRUE[group_stats$variable=="pct_black"] <- paste0(group_stats$sd_group_TRUE[group_stats$variable=="pct_black"], "0")
group_stats[nrow(group_stats), 2:ncol(group_stats)] <-
  group_stats[nrow(group_stats), 2:ncol(group_stats)] %>%
  mutate(across(everything(), ~ format(round(as.numeric(.x), 0), big.mark = ",")))
group_stats %<>% mutate(
  group_TRUE = paste0(mean_group_TRUE, " (", sd_group_TRUE, ") "),
  group_FALSE = paste0(mean_group_FALSE, " (", sd_group_FALSE, ") ")
)
result %<>% left_join(group_stats[, c("variable", "group_TRUE", "group_FALSE")], by="variable")
result %<>% dplyr::select(variable, group_TRUE, group_FALSE, smd, p_value)
result$smd %<>% round(3)
result$p_value %<>% round(3)
result %<>% rename(
  Variable = variable,
  `With voting data` = group_TRUE,
  `Without voting data` = group_FALSE,
  `Standardized Mean Difference` = smd,
  `p-value` = p_value
)
result <- result[match(covariates, result$Variable), ]
result %<>% mutate(
  Variable = case_when(
    Variable == "pct_black"        ~ "Percent Black",
    Variable == "pct_asian"        ~ "Percent Asian",
    Variable == "pct_other"        ~ "Percent Other",
    Variable == "pct_hisp"         ~ "Percent Hispanic",
    Variable == "pct_male"         ~ "Percent Male",
    Variable == "pct_age_10s"      ~ "Percent Age 10s",
    Variable == "pct_age_20s"      ~ "Percent Age 20s",
    Variable == "pct_age_30s"      ~ "Percent Age 30s",
    Variable == "pct_age_40s"      ~ "Percent Age 40s",
    Variable == "pct_age_50s"      ~ "Percent Age 50s",
    Variable == "pct_age_60s"      ~ "Percent Age 60s",
    Variable == "pct_age_70s"      ~ "Percent Age 70s",
    Variable == "pct_age_80s"      ~ "Percent Age 80s",
    Variable == "income_dec"       ~ "Income Decile",
    Variable == "pct_hsgrad"       ~ "Percent High School Graduates",
    Variable == "pct_somecollege"  ~ "Percent Some College",
    Variable == "pct_bach"         ~ "Percent Bachelor's Degree",
    Variable == "pct_postgrad"     ~ "Percent Postgraduate Degree",
    Variable == "pct_suburban"     ~ "Percent Suburban",
    Variable == "pct_urban"        ~ "Percent Urban",
    Variable == "totpop"           ~ "Total Population",
    TRUE                           ~ Variable  # fallback for unmatched
  )
)
xtab <- xtable(result, caption = "Balance Test: Demographic Differences by Matched Precinct-Level Election Data Availability", label = "table:covbal")
align(xtab) <- c("l", "l", rep("c", ncol(result) - 1))

# Print Table A.1 -----
print.xtable(xtab,
      include.rownames = FALSE,
      caption.placement = "top",
      label = "table:covbal",
      floating = TRUE
      # file = paste0(table_dir, "zipvote-covbal.tex")
      )





##### Step 3: base regressions #####



base_controls <- c(
  "pos_lin_msnbc",
  "i(chan_config)",
  "num_channels",
  "num_bc")

demog_controls <- c(
                    "pct_black", "pct_asian", "pct_other", "pct_hisp",
                    "pct_male",
                    "pct_age_10s", "pct_age_20s", "pct_age_30s", "pct_age_40s", "pct_age_50s", "pct_age_60s", "pct_age_70s", "pct_age_80s",
                    "i(income_dec)",
                    "pct_hsgrad", "pct_somecollege", "pct_bach", "pct_postgrad",
                    "pct_suburban", "pct_urban",
                    "totpop" #add total pop
)

f_share0 <- fixest_form(y="tea_party_share", x=c("pos_lin_fnc", "pos_lin_msnbc"), fe="district")
f_share <- fixest_form(y="tea_party_share", x=c("pos_lin_fnc", base_controls), fe="district")
f_share_demo <- fixest_form(y="tea_party_share", x=c("pos_lin_fnc", base_controls, demog_controls), fe="district")

m_share0 <- feols(f_share0, data = chpos[position_year == 2009], weight = ~ total_vote, cluster="stid")
m_share <- feols(f_share, data = chpos[position_year == 2009], weight = ~ total_vote, cluster="stid")
m_share_demo <- feols(f_share_demo, data = chpos[position_year == 2009], weight = ~ total_vote, cluster="stid")

setFixest_dict(c(zip="Zip",
                 district="District",
                 pos_lin_fnc = "FNC Channel Pos.",
                 pos_lin_fnc_appearance = "FNC Channel Pos. $\\times$ FNC Appearance",
                 pos_lin_fnc_appearance_pre_primary = "FNC Channel Pos. $\\times$ Pre-Primary FNC Appearance",
                 pos_lin_msnbc = "MSNBC Channel Pos.",
                 tea_party_share="Tea Party Cand. Vote Share",
                 total_vote="Total Votes Cast in Republican Primary",
                 dmacode = "Designated Market Area (DMA)",
                 stid = "Cable System",
                 turnout_eligible = "Primary Turnout Rate Proxy (# Votes Cast / # Age-Eligible Residents)"
),
reset=T
)

if(length(m_share$obs_selection$obsRemoved)==0) {
  mean_y1 <- weighted.mean(chpos$tea_party_share[chpos$position_year == 2009], weight=chpos$total_vote[chpos$position_year == 2009], na.rm=TRUE)
} else {
  mean_y1 <- weighted.mean(chpos$tea_party_share[setdiff(1:nrow(chpos), -m_share$obs_selection$obsRemoved)], weight=chpos$total_vote[chpos$position_year == 2009], na.rm = TRUE)
}
mean_y2 <- weighted.mean(chpos$tea_party_share[setdiff(1:nrow(chpos), -m_share_demo$obs_selection$obsRemoved)], weight=chpos$total_vote[chpos$position_year == 2009], na.rm = TRUE)


# Print Table 9 -----

print(etable(m_share, m_share_demo,
       extralines = list("__Mean of DV" = c(sprintf("%.3f", mean_y1), sprintf("%.3f", mean_y2))),
    # file = paste0(table_dir, "primary_voteshare.tex"),
    title = "Exposure to Fox News Increased Precinct Primary Vote Shares for Tea Party Candidates",
    cluster=~stid,
    drop = "(Intercept)",
    group=list("\\midrule Cable system controls"=c("chan", base_controls),
               "Demographic controls"=c("income", demog_controls)),
    label = "table:primary",
    digits=3,
    digits.stats=2,
    style.tex=tablestyle,
    replace = T
  ))



# Print Figure H.2.1 -----

w_used <- as.numeric(stats::weights(m_share_demo))
plot_df <- chpos[position_year == 2009]
plot_df$w_used <- w_used

resid_y <- resid(feols(fixest_form(y="tea_party_share", 
                           x=c(
                             # "pos_lin_fnc",
                             base_controls, demog_controls), fe="district"),
               plot_df, weight = ~ w_used, cluster="stid"))
resid_x <- resid(feols(fixest_form(y="pos_lin_fnc", 
                                   x=c(
                                     # "pos_lin_fnc",
                                     base_controls, demog_controls), fe="district"),
                       plot_df, weight = ~ w_used, cluster="stid"))
X = "FNC Cable Channel Position"
Y = "Primary Vote Shares for Tea Party Candidates"
ggplot(data.frame(resid_y, resid_x), aes(x = resid_x, y = resid_y)) +
  geom_point(aes(size = w_used[!is.na(w_used)]), alpha = 0.1) +
  scale_size_continuous(name = "Precinct Total Votes") + 
  geom_smooth(aes(weight = w_used[!is.na(w_used)]), method = "lm", se = FALSE, color = "blue") +
  labs(
    x = paste0("Residualized ", X),
    y = paste0("Residualized ", Y)
  ) +
  theme_light() +
  theme(    axis.text = element_text(size=12),
            axis.title = element_text(size=15),
            legend.title = element_text(size=12),
            legend.text = element_text(size=12)) 
# ggsave(filename = paste0(plot_dir, "residualized_scatterplot_tea_party_share_on_channelpos_weighted.pdf"), height = 6, width = 8)



# Print Table H.1.1 -----

dv = "tea_party_share"
  
  assign(paste0(dv, "_f_reduced"), fixest_form(y=dv, x=c("pos_lin_fnc", base_controls), fe="district"))
  
  full_model <- m_share_demo
  rows.to.remove <- -full_model$obs_selection$obsRemoved
  
  df <- chpos[position_year == 2009]
  
  assign(paste0(dv, "_m_reduced"), feols(get(paste0(dv, "_f_reduced")), 
                                         data=df[setdiff(1:nrow(df), rows.to.remove), ], weight = ~total_vote, cluster="stid"))
  
  # calculate oster ratio
  reduced_model <- get(paste0(dv, "_m_reduced"))
  coef_reduced <- reduced_model$coefficients["pos_lin_fnc"]
  r2_reduced <- as.numeric(fitstat(reduced_model, "wr2"))
  coef_full <- full_model$coefficients["pos_lin_fnc"]
  r2_full <- as.numeric(fitstat(full_model, "wr2"))
  oster_ratio <- coef_full*(r2_full-r2_reduced)/((coef_reduced - coef_full)*(min(1, 1.3*r2_full)-r2_full))
  assign(paste0(dv, "_oster_ratio"), oster_ratio)
  print(oster_ratio)
  
  # calculate oster bounds
  coef_bound <- coef_full - (coef_reduced-coef_full)*(min(1, 1.3*r2_full)-r2_full)/(r2_full-r2_reduced)
  print(coef_bound)
  if(coef_bound >= coef_full) {
    if(round(coef_full, digits=1)==0) {
      oster_bound <- paste0("[", as.character(signif(coef_full, digits=3)), ", ", as.character(signif(coef_bound, digit=3)), "]")
    } else{
      oster_bound <- paste0("[", as.character(round(coef_full, digits=1)), ", ", as.character(round(coef_bound, digit=1)), "]")
    }
   print(oster_bound)
  } else {
    if(round(coef_full, digits=1)==0) {
      oster_bound <- paste0("[", as.character(signif(coef_bound, digits=3)), ", ", as.character(signif(coef_full, digit=3)), "]")
    } else{
      oster_bound <- paste0("[", as.character(round(coef_bound, digits=1)), ", ", as.character(round(coef_full, digit=1)), "]")
    }
    print(oster_bound)
  }
  
  setFixest_etable(fitstat = ~ n + war2) # adjusted within R2
  
  tablestyle  = style.tex(main="aer",
                          fixef.suffix = " FEs",
                          fixef.where ="var",
                          fixef.title = "",
                          stats.title = "\\midrule",
                          tablefoot=F,
                          yesNo="\\checkmark")
  
  mean_y1 <- mean(df[[dv]][setdiff(1:nrow(df), -full_model$obs_selection$obsRemoved)], na.rm = TRUE)
  mean_y2 <- mean(df[[dv]][setdiff(1:nrow(df), -full_model$obs_selection$obsRemoved)], na.rm = TRUE)
  
  # file = paste0(dv, "_oster_ratio.tex")
  title = case_when(
    dv=="tea_party_share" ~ "Tea Party Cand. Vote Share",
    TRUE ~ NA_character_
  )
  title = paste0(title, " (for Oster Ratio Calculation)")
  label = paste0("table:", str_replace_all(dv, pattern="_", replacement="-"), "-oster-ratio")
  
  print(etable(reduced_model, full_model,
         extralines = list("__Mean of DV" = c(sprintf("%.3f", mean_y1), sprintf("%.3f", mean_y2))),
         # file = file,
         title = title,
         label = label,
         cluster=~stid,
         drop = "(Constant)",
         group=list("\\midrule Cable system controls"=c("chan", base_controls),
                    "Demographic controls"=c("income", demog_controls)),
         digits=3,
         digits.stats=2,
         style.tex=tablestyle,
         replace = T,
         depvar = F,
         placement = "H"
  ))
  




##### Step 4: additional regressions ######



chpos %<>% mutate(
  totvotingpop = totpop*(1-pct_age_0s-pct_age_10s)
)

chpos %<>% mutate(
  turnout_eligible = total_vote/totvotingpop,
  turnout_eligible = case_when(
    turnout_eligible<0 ~ 0,
    turnout_eligible>1 ~ 1,
    TRUE ~ turnout_eligible
  )
)

turnout_eligible_f_share0 <- fixest_form(y="turnout_eligible", x=c("pos_lin_fnc", "pos_lin_msnbc"), fe="district")
turnout_eligible_f_share <- fixest_form(y="turnout_eligible", x=c("pos_lin_fnc", base_controls), fe="district")
turnout_eligible_f_share_demo <- fixest_form(y="turnout_eligible", x=c("pos_lin_fnc", base_controls, demog_controls), fe="district")

turnout_eligible_m_share0 <- feols(turnout_eligible_f_share0, data = chpos[position_year == 2009 & !is.na(tea_party_share)], weight=~totvotingpop, cluster="stid")
turnout_eligible_m_share <- feols(turnout_eligible_f_share, data = chpos[position_year == 2009 & !is.na(tea_party_share)], weight=~totvotingpop, cluster="stid")
turnout_eligible_m_share_demo <- feols(turnout_eligible_f_share_demo, data = chpos[position_year == 2009 & !is.na(tea_party_share)], weight=~totvotingpop, cluster="stid")

if(length(turnout_eligible_m_share$obs_selection$obsRemoved)==0) {
  mean_y1 <- weighted.mean(chpos$turnout_eligible[chpos$position_year == 2009], weight=chpos$totvotingpop[chpos$position_year == 2009], na.rm=TRUE)
} else {
  mean_y1 <- weighted.mean(chpos$turnout_eligible[setdiff(1:nrow(chpos), -turnout_eligible_m_share$obs_selection$obsRemoved)], weight=chpos$totvotingpop[chpos$position_year == 2009], na.rm = TRUE)
}
mean_y2 <- weighted.mean(chpos$turnout_eligible[setdiff(1:nrow(chpos), -turnout_eligible_m_share_demo$obs_selection$obsRemoved)], weight=chpos$totvotingpop[chpos$position_year == 2009], na.rm = TRUE)


# Print Table H.3.1 -----

print(etable(turnout_eligible_m_share, turnout_eligible_m_share_demo,
       extralines = list("__Mean of DV" = c(sprintf("%.3f", mean_y1), sprintf("%.3f", mean_y2))),
       # file = paste0(table_dir, "primary_turnout_eligible_weighted.tex"),
       title = "FNC Effects on Turnout (Share of Age-Eligible Population) in Republican Primaries",
       cluster=~district,
       drop = "(Intercept)",
       group=list("\\midrule Cable system controls"=c("chan", base_controls),
                  "Demographic controls"=c("income", demog_controls)),
       label = "table:primary-turnout-eligible-weighted",
       digits=3,
       digits.stats=2,
       style.tex=tablestyle,
       replace = T,
       placement = "H"
))



